'''
Compute average reward per action (i.e., persuasive strategy).

Author: Nele Albers
Date: April 2022
'''
"""
Here the action is different clusters of actions
"""

import numpy as np
import Utils as util


def compute_avg_reward(data, effort_mean, num_act):
    """Compute the average reward per action (i.e., persuasive strategy).
    Args:
        data (list): List with samples of the form <s0, s1, a, r>.
        effort_mean (float): Mean value of the weighted sum of effort responses and dropout responses.
        num_act (int): Number of possible actions.
    Returns:
        np-array: Average reward per action.
    """
    num_samples = len(data)

    # All weighted sum of reward
    list_of_efforts = list(np.array(data, dtype=object)[:, 3].astype(int))

    # Map weighted sum of reward to reward scores from -1 to 1, with the mean mapped to 0.
    map_to_rewards = util.get_map_effort_reward(effort_mean,
                                                output_lower_bound=-1,
                                                output_upper_bound=1,
                                                input_lower_bound=-5,
                                                input_upper_bound=15)
    reward_list = util.map_efforts_to_rewards(list_of_efforts, map_to_rewards)

    rewards = np.zeros(num_act)
    trials = np.zeros(num_act)

    for data_index in range(num_samples):
        index = int(data[data_index][2])
        # rewards[data[data_index][2]] += reward_list[data_index]
        # trials[data[data_index][2]] += 1
        rewards[index] += reward_list[data_index]
        trials[index] += 1

    # Calculate average reward per action
    avg_reward = np.divide(rewards, trials,
                           out=np.zeros_like(rewards),
                           where=trials != 0)

    return avg_reward

